Introduction

Divvy Database Creation

Database connection

Establish postgresql database connection

# Read config
config <- read.ini("resources/db_config.ini")
db <- config$postgresql

# Safe database connection
tryCatch({
  con <- dbConnect(
    Postgres(),
    host = db$host,
    dbname = db$database,
    user = db$user,
    password = db$password,
    port = as.integer(db$port)
  )
}, error = function(e) {
  stop("Database connection failed: ", e$message)
})

# Register connection for SQL chunks
knitr::opts_chunk$set(connection = con)

Database schema

Create database schema to the database

CREATE SCHEMA IF NOT EXISTS divvy;

Database tables

Read data into R environment

# Define year and months
year <- "2024"
months <- sprintf("%02d", 1:12)

# Initialize list to store data
divvy_data <- list()

# Loop through months and read each file
for (m in months) {
  file_path <- paste0("resources/data/", year, m, "-divvy-tripdata.csv")
  month_name <- tolower(format(as.Date(paste0(year, "-", m, "-01")), "%B"))
  
  divvy_data[[month_name]] <- read_csv(file_path, show_col_types = FALSE)
}

Drop tables if already exists

# Define all month names
months <- tolower(month.name)

for (m in months) {
  sql <- glue::glue("DROP TABLE IF EXISTS divvy.{m};")
  DBI::dbExecute(con, sql)
}

Create and load data for the month of January

# Create table
create_jan_table <- "
  CREATE TABLE divvy.january (
      ride_id             TEXT PRIMARY KEY,
      rideable_type       TEXT,
      started_at          TIMESTAMP,
      ended_at            TIMESTAMP,
      start_station_name  TEXT,
      start_station_id    TEXT,
      end_station_name    TEXT,
      end_station_id      TEXT,
      member_casual       TEXT
  );
"
DBI::dbExecute(con, create_jan_table)
## [1] 0
# Write to PostgreSQL (replaces table if exists)
dbWriteTable(
  conn      = con,
  name      = DBI::Id(schema = "divvy", table = "january"),
  value     = divvy_data$january,
  overwrite = TRUE,   # drop & recreate table
  row.names = FALSE
)

Create and load data for the month of February

# Create table
create_feb_table <- "
  CREATE TABLE divvy.february (
      ride_id             TEXT PRIMARY KEY,
      rideable_type       TEXT,
      started_at          TIMESTAMP,
      ended_at            TIMESTAMP,
      start_station_name  TEXT,
      start_station_id    TEXT,
      end_station_name    TEXT,
      end_station_id      TEXT,
      member_casual       TEXT
  );
"
DBI::dbExecute(con, create_feb_table)
## [1] 0
# Write to PostgreSQL (replaces table if exists)
dbWriteTable(
  conn      = con,
  name      = DBI::Id(schema = "divvy", table = "february"),
  value     = divvy_data$february,
  overwrite = TRUE,   # drop & recreate table
  row.names = FALSE
)

Database views

SELECT COUNT(*) FROM divvy.january;
1 records
count
144873
SELECT COUNT(*) FROM divvy.february;
1 records
count
223164

Database functions

Exploratory Data Analysis

Conclusion